ct <- factor(Phenotype_met_design,
levels = c("IDHm.DNMT3Am.WT1m.FLT3m.TET2WT",
"IDHm.DNMT3Am.WT1WT.FLT3AWT.TET2WT",
"IDHm.DNMT3Am.WT1WT.FLT3m.TET2WT",
"IDHm.DNMT3AWT.WT1m.FLT3AWT.TET2WT",
"IDHm.DNMT3AWT.WT1WT.FLT3AWT.TET2WT",
"IDHWT.DNMT3Am.WT1WT.FLT3AWT.TET2WT",
"IDHWT.DNMT3Am.WT1WT.FLT3m.TET2WT",
"IDHWT.DNMT3AWT.WT1m.FLT3AWT.TET2WT",
"IDHWT.DNMT3AWT.WT1WT.FLT3AWT.TET2m",
"IDHWT.DNMT3AWT.WT1WT.FLT3AWT.TET2WT",
"IDHWT.DNMT3AWT.WT1WT.FLT3m.TET2WT"),
labels = c("IDHm.DNMT3Am.WT1m.FLT3m",
"IDHm.DNMT3Am",
"IDHm.DNMT3Am.FLT3m",
"IDHm.WT1m",
"IDHm",
"DNMT3Am",
"DNMT3Am.FLT3m",
"WT1m",
"TET2m",
"WT",
"FLT3m"))
design <- model.matrix(~ 0 + ct)
colnames(design) <- levels(ct)
fit <- lmFit(BMIQ_met, design)
contrasts <- makeContrasts(`IDHm-WT` = IDHm - WT,
`DNMT3Am-WT` = DNMT3Am - WT,
`WT1m-WT` = WT1m - WT,
`FLT3m-WT` = FLT3m - WT,
`TET2m-WT` = TET2m - WT,
levels = design)
move <- function(data, cols, ref, side = c("before", "after")) {
if (!requireNamespace("dplyr")) {
stop("Make sure package 'dplyr' is installed to use function 'move'")
}
side <- match.arg(side)
cols <- rlang::enquo(cols)
ref <- rlang::enquo(ref)
if (side == "before") {
dplyr::select(data, 1:!!ref, -!!ref, -!!cols, !!cols, dplyr::everything())
} else {
dplyr::select(data, 1:!!ref, -!!cols, !!cols, dplyr::everything())
}
}
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend = TRUE)
FitList <- list()
for (i in 1:ncol(contrasts)) {
FitList[[colnames(contrasts)[i]]] <- topTable(fit2, coef = i, number = nrow(BMIQ_met)) %>%
mutate(ID = rownames(.)) %>%
move(., ID, logFC, side = "before") #%>%
# filter(adj.P.Val < 0.05)
}
comparisons <- names(FitList)
Create_heatmap_corr <- function(DATA, remove_cases = 0, Phenotype, corr_method = "pearson") {
name <- ""
if (class(remove_cases) != "logical" ){
number_of_cases <- length(DATA[1,])
remove_cases <- rep(TRUE, number_of_cases)
name = "full_data"
}
DATA_filtered <- DATA[,remove_cases]
Phenotype_filtered <- Phenotype[remove_cases,]
matrix <- as.matrix.data.frame(DATA_filtered)
ann_colors <- list(
DNMT3a = c(DNMT3AWT = "brown1", DNMT3Am = "cyan4"),
IDH = c(IDHWT = "bisque1", IDHm = "coral3"),
WT1 = c(WT1WT = "chartreuse4", WT1m = "brown4"),
FLT3 = c(FLT3AWT = "brown", FLT3m = "darkgoldenrod1"),
TET2 = c(TET2WT = "brown1", TET2m = "cyan4"))
annotation_for_heatmap <- data.frame(IDH = Phenotype_filtered$IDH, DNMT3a = Phenotype_filtered$DNMT3A, WT1 = Phenotype_filtered$WT1, FLT3 = Phenotype_filtered$FLT3, TET2 = Phenotype_filtered$TET2)
rownames(annotation_for_heatmap) <- colnames(DATA_filtered)
corr <- rcorr(matrix, type = corr_method)$r
name <- ifelse(name == "full_data", name, deparse(substitute(remove_cases)))
colnames(corr) <- colnames(DATA_filtered)
heatmap <- pheatmap(corr,
color = colorRampPalette(rev(brewer.pal(n = 11, name = "RdYlBu")))(100),
annotation_col = annotation_for_heatmap,
annotation_colors = ann_colors,
legend = TRUE,
treeheight_row = 20,
main = paste0("Heatmap ", deparse(substitute(DATA)), " ", name, " ", corr_method, " correlation"),
fontsize = 10
)
return(heatmap)
}
heat_BMIQ_all_data <- Create_heatmap_corr(BMIQ_met, Phenotype = Phenotype_met)

single_mutation <- rowSums(design[,c(1:4,7)]) == 0
heat_single_mutation <- Create_heatmap_corr(BMIQ_met, single_mutation, Phenotype_met)

png(filename = "Results/heatmap_BMIQ_allData.png", width = 1920, height = 1080)
heat_BMIQ_all_data
dev.off()
## png
## 2
png(filename = "Results/heatmap_BMIQ_single_mutation.png", width = 1920, height = 1080)
heat_single_mutation
dev.off()
## png
## 2
no_wild_type <- design[,10] == 0
heat_no_WT <- Create_heatmap_corr(BMIQ_met, no_wild_type, Phenotype_met)

png(filename = "Results/heatmap_BMIQ_no_wild_type.png", width = 1920, height = 1080)
heat_no_WT
dev.off()
## png
## 2
no_wild_type_single_mutation <- rowSums(design[,c(1:4,7, 10)]) == 0
heat_no_WT_single_mutation <- Create_heatmap_corr(BMIQ_met, no_wild_type_single_mutation, Phenotype_met)

png(filename = "Results/heatmap_BMIQ_no_wild_type_single_mutation.png", width = 1920, height = 1080)
heat_no_WT_single_mutation
dev.off()
## png
## 2
deconvolution <- read.delim("deconvolution.txt", row.names=1)
Cases <- ALL_GDC_sample$`Case ID` %>% str_replace_all(., "-", ".")
deconvolution <- deconvolution[rownames(deconvolution) %in% Cases,]
MacroPatient <- rownames(deconvolution[deconvolution$uncharacterized.cell > 0.5,])
No_Macro_deconv <- colnames(BMIQ_met) %in% MacroPatient
names(No_Macro_deconv) <- seq(1:length(No_Macro_deconv))
heat_no_M2_patients <- Create_heatmap_corr(DATA = BMIQ_met, remove_cases = No_Macro_deconv, Phenotype = Phenotype_met, corr_method = "pearson")

png(filename = "Results/heat_no_M2_patients.png", width = 1920, height = 1080)
heat_no_M2_patients
dev.off()
## png
## 2
DATA_for_PCA <- cbind(t(BMIQ_met), Phenotype_met)
res.pca <- PCA(DATA_for_PCA, scale.unit = FALSE, ncp = 5, graph = TRUE, quali.sup = c(394047:394051))


png(filename = "Results/Methylation_ellipses_PCA.png", width = 1920, height = 1080)
plotellipses(res.pca, 394047:394051)
dev.off()
## png
## 2
png(filename = "Results/Methylation_barplot_PCA.png", width = 1920, height = 1080)
fviz_eig(res.pca, addlabels = TRUE)
dev.off()
## png
## 2
fviz_eig(res.pca, addlabels = TRUE)

plotellipses(res.pca, 394047:394051)

Filter_Differential_Methylation <- function(DATA, pvalue, logFC_treshold) {
message("[=========================================================]")
message("[<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]")
message("[<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]")
message("-----------------------------------------------------------")
High_contrast <- DATA[DATA$P.Value < pvalue & DATA$logFC**2 > logFC_treshold**2,]
message("The number of CpGs differentially methylated is ")
print(length(High_contrast[,1]))
return(High_contrast)
}
if (!exists("anno450")){
anno450 <- read.csv("HumanMethylation450_15017482_v1-2.csv", skip = 7) %>% dplyr::select(., "Name", "CHR", "MAPINFO", "UCSC_RefGene_Name", "UCSC_RefGene_Group", "Relation_to_UCSC_CpG_Island")
}
Filtered_methylation_data <- list()
for (i in names(FitList)){
message(paste0("\n=== Comparison ", i, " analyze ==="))
Filtered_methylation_data[[i]] <- Filter_Differential_Methylation(FitList[[i]], pvalue = 0.05, logFC_treshold = 0.3) %>% merge(., anno450, by.x = "ID", by.y = "Name") %>% dplyr::filter(., str_detect(.$ID, "cg"))
}
##
## === Comparison IDHm-WT analyze ===
## [=========================================================]
## [<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]
## [<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]
## -----------------------------------------------------------
## The number of CpGs differentially methylated is
## [1] 9928
##
## === Comparison DNMT3Am-WT analyze ===
## [=========================================================]
## [<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]
## [<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]
## -----------------------------------------------------------
## The number of CpGs differentially methylated is
## [1] 1187
##
## === Comparison WT1m-WT analyze ===
## [=========================================================]
## [<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]
## [<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]
## -----------------------------------------------------------
## The number of CpGs differentially methylated is
## [1] 314
##
## === Comparison FLT3m-WT analyze ===
## [=========================================================]
## [<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]
## [<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]
## -----------------------------------------------------------
## The number of CpGs differentially methylated is
## [1] 1463
##
## === Comparison TET2m-WT analyze ===
## [=========================================================]
## [<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]
## [<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]
## -----------------------------------------------------------
## The number of CpGs differentially methylated is
## [1] 2746
enhanced_volcano <- list()
for (i in names(FitList)) {
enhanced_volcano[[i]] <- data.frame(logFC = FitList[[i]]$logFC, pvalue = FitList[[i]]$P.Value, cpgs = FitList[[i]]$ID)
enhanced_volcano[[i]] <- merge(enhanced_volcano[[i]], anno450, by.x = "cpgs", by.y = "Name")
enhanced_volcano[[i]] <- EnhancedVolcano(
toptable = enhanced_volcano[[i]],
lab = enhanced_volcano[[i]]$UCSC_RefGene_Name,
x = "logFC",
y = "pvalue",
FCcutoff = 0.1,
pCutoff = 0.0001,
title = "Volcano plot of the differential\nmethylation of CpGs between WT and Mut",
subtitle = NA,
legendPosition = "right",
subtitleLabSize = 0,
legendLabSize = 10,
ylim = c(0, 10)
)
}
comparisons <- names(enhanced_volcano)
j <- 4
png(file = paste0("Results/", comparisons[j], "_Methylation_beta_values_BMIQ_volcano_plot.png"), width = 1920, height = 1080)
par(mfrow = c(1, 1))
enhanced_volcano[[comparisons[j]]]
dev.off()
## png
## 2
if (!exists("Blueprint_network")) {
Blueprint_network <- read.csv("BLUEPRINT_fragments_good.tsv", sep = "\t")
Blueprint_network <- dplyr::select(Blueprint_network, "chr", "start", "end", "type", "ensembl", "gene_names", "intronic_regions", "type") #%>% filter(., type == "P")
Blueprint_network <- Blueprint_network %>% separate_rows(., gene_names, sep = " ")
Blueprint_GRanges <- GRanges(
seqnames = Blueprint_network$chr,
ranges = IRanges(start = Blueprint_network$start, end = Blueprint_network$end),
type = Blueprint_network$type,
gene_names = Blueprint_network$gene_names
)
}
GRanges_methylations <- list()
match_hit_methylation_promoters <- list()
Promoters_list <- list()
for (i in names(Filtered_methylation_data)) {
GRanges_methylations[[i]] <- GRanges(
seqnames = Filtered_methylation_data[[i]]$CHR,
ranges = IRanges(start = Filtered_methylation_data[[i]]$MAPINFO, end = Filtered_methylation_data[[i]]$MAPINFO + 1),
logFC = Filtered_methylation_data[[i]]$logFC
)
overlaps <- findOverlaps(Blueprint_GRanges, GRanges_methylations[[i]])
match_hit_methylation_promoters[[i]] <- data.frame(mcols(Blueprint_GRanges[queryHits(overlaps)]), as.data.frame(mcols(GRanges_methylations[[i]])[subjectHits(overlaps), ]), stringsAsFactors = T)
colnames(match_hit_methylation_promoters[[i]]) <- c("type", "gene_names", "logFC")
Promoters_list[[i]] <- match_hit_methylation_promoters[[i]][match_hit_methylation_promoters[[i]]$type == "P",]
message(paste0("\n=== Comparison ", i, " analyze ==="))
message("=== CpGs Differentially methylated overlapping Genes promoters ===")
print(length(Promoters_list[[i]]$gene_names))
}
##
## === Comparison IDHm-WT analyze ===
## === CpGs Differentially methylated overlapping Genes promoters ===
## [1] 7479
##
## === Comparison DNMT3Am-WT analyze ===
## === CpGs Differentially methylated overlapping Genes promoters ===
## [1] 960
##
## === Comparison WT1m-WT analyze ===
## === CpGs Differentially methylated overlapping Genes promoters ===
## [1] 295
##
## === Comparison FLT3m-WT analyze ===
## === CpGs Differentially methylated overlapping Genes promoters ===
## [1] 854
##
## === Comparison TET2m-WT analyze ===
## === CpGs Differentially methylated overlapping Genes promoters ===
## [1] 1813
if (!exists("pchic")){
load("pchic.RData")
pchic <- pchic[, c(1:10)]
List_Promoter <- paste(pchic$baitChr, pchic$baitStart, sep = "_")
List_Promoter <- unique(List_Promoter)
colnames(pchic)[c(1:5, 6:10)] <- rep(c("chr", "start", "end", "ID", "Name"), 2)
PCHiC_bed <- unique(rbind(pchic[, c(1:3, 5)], pchic[, c(6:8, 10)]))
PCHiC_GRange <- GRanges(
seqnames = PCHiC_bed$chr,
IRanges(start = PCHiC_bed$start, end = PCHiC_bed$end),
Gene_Pchic = PCHiC_bed$Name
)
PCHiC_GRange$ID <- paste(PCHiC_bed$chr, PCHiC_bed$start, sep = "_")
colnames(pchic) <- c("chr_bait", "start_bait", "end_bait", "ID_bait", "Name_bait", "chr_oe", "start_oe", "end_oe", "ID_oe", "Name_oe")
pchic$IDbait <- paste(pchic$chr_bait, pchic$start_bait, sep = "_")
pchic$IDoe <- paste(pchic$chr_oe, pchic$start_oe, sep = "_")
}
matchit_methylome_pchic <- list()
for (i in names(Filtered_methylation_data)){
overlaps <- findOverlaps(PCHiC_GRange, GRanges_methylations[[i]])
matchit_methylome_pchic[[i]] <- data.frame(mcols(PCHiC_GRange[queryHits(overlaps)]), as.data.frame(mcols(GRanges_methylations[[i]])[subjectHits(overlaps), ]), stringsAsFactors = T)
colnames(matchit_methylome_pchic[[i]]) <- c("Gene_pchic", "ID", "logFC")
matchit_methylome_pchic[[i]]$CpG <- TRUE
message(paste0("\n=== Comparison ", i, " analyze ==="))
message("=== CpGs overlapping PCHiC chromatin fragments ===")
print(length(unique(matchit_methylome_pchic[[i]]$ID)))
}
##
## === Comparison IDHm-WT analyze ===
## === CpGs overlapping PCHiC chromatin fragments ===
## [1] 5011
##
## === Comparison DNMT3Am-WT analyze ===
## === CpGs overlapping PCHiC chromatin fragments ===
## [1] 663
##
## === Comparison WT1m-WT analyze ===
## === CpGs overlapping PCHiC chromatin fragments ===
## [1] 182
##
## === Comparison FLT3m-WT analyze ===
## === CpGs overlapping PCHiC chromatin fragments ===
## [1] 751
##
## === Comparison TET2m-WT analyze ===
## === CpGs overlapping PCHiC chromatin fragments ===
## [1] 1379
CpGs_interaction <- list()
for (i in names(matchit_methylome_pchic)){
CpGs_interaction[[i]] <- unique(rbind(pchic[which(pchic$IDbait %in% matchit_methylome_pchic[[i]]$ID), ], pchic[which(pchic$IDoe %in% matchit_methylome_pchic[[i]]$ID), ]))
message(paste0("\n=== Comparison ", i, " analyze ==="))
message("=== Chromatin fragment connected to CpGs Differentially methylated ===")
ID_chromatin <- unique(c(CpGs_interaction[[i]]$IDbait, CpGs_interaction[[i]]$IDoe))
print(length(ID_chromatin))
}
##
## === Comparison IDHm-WT analyze ===
## === Chromatin fragment connected to CpGs Differentially methylated ===
## [1] 66011
##
## === Comparison DNMT3Am-WT analyze ===
## === Chromatin fragment connected to CpGs Differentially methylated ===
## [1] 11491
##
## === Comparison WT1m-WT analyze ===
## === Chromatin fragment connected to CpGs Differentially methylated ===
## [1] 4436
##
## === Comparison FLT3m-WT analyze ===
## === Chromatin fragment connected to CpGs Differentially methylated ===
## [1] 12893
##
## === Comparison TET2m-WT analyze ===
## === Chromatin fragment connected to CpGs Differentially methylated ===
## [1] 22391
CpGs_interaction_nodes <- list()
match_hit_CpGs_neighbor_promoter <- list()
for (i in names(CpGs_interaction)) {
message(paste0("\n=== Comparison ", i, " analyze ==="))
CpGs_interaction_nodes[[i]] <- CpGs_interaction[[i]][,c(1:3,11,5:8,12,10)]
colnames(CpGs_interaction_nodes[[i]]) <- rep(c("chr", "start", "end", "ID", "Name"),2)
CpGs_interaction_nodes[[i]] <- unique(rbind(CpGs_interaction_nodes[[i]][,c(1:5)], CpGs_interaction_nodes[[i]][,c(6:10)])) %>% merge(., matchit_methylome_pchic[[i]], by.x = "ID", by.y = "ID")
CpGs_interaction_nodes_Granges <- GRanges(
seqnames = CpGs_interaction_nodes[[i]]$chr,
ranges = IRanges(start = CpGs_interaction_nodes[[i]]$start, end = CpGs_interaction_nodes[[i]]$end),
ID = CpGs_interaction_nodes[[i]]$ID,
logFC = CpGs_interaction_nodes[[i]]$logFC
)
overlaps <- findOverlaps(Blueprint_GRanges, CpGs_interaction_nodes_Granges)
match_hit_CpGs_neighbor_promoter[[i]] <- data.frame(mcols(Blueprint_GRanges[queryHits(overlaps)]), as.data.frame(mcols(CpGs_interaction_nodes_Granges)[subjectHits(overlaps), ]), stringsAsFactors = T)
colnames(match_hit_CpGs_neighbor_promoter[[i]]) <- c("type", "gene_names", "ID", "logFC")
message("=== Number of nodes ===")
print(length(match_hit_CpGs_neighbor_promoter[[i]]$type))
message("\n=== Number of nodes corresponding to Promoter ===")
nodes_promoter <- match_hit_CpGs_neighbor_promoter[[i]][match_hit_CpGs_neighbor_promoter[[i]]$type == "P",]
print(length(nodes_promoter$gene_names))
message("\n=== Number of genes having Promoter in the neighbor of CpGs DM ===")
print(length(unique(nodes_promoter$gene_names)))
}
##
## === Comparison IDHm-WT analyze ===
## === Number of nodes ===
## [1] 11640
##
## === Number of nodes corresponding to Promoter ===
## [1] 7482
##
## === Number of genes having Promoter in the neighbor of CpGs DM ===
## [1] 4045
##
## === Comparison DNMT3Am-WT analyze ===
## === Number of nodes ===
## [1] 1454
##
## === Number of nodes corresponding to Promoter ===
## [1] 960
##
## === Number of genes having Promoter in the neighbor of CpGs DM ===
## [1] 610
##
## === Comparison WT1m-WT analyze ===
## === Number of nodes ===
## [1] 372
##
## === Number of nodes corresponding to Promoter ===
## [1] 295
##
## === Number of genes having Promoter in the neighbor of CpGs DM ===
## [1] 210
##
## === Comparison FLT3m-WT analyze ===
## === Number of nodes ===
## [1] 1437
##
## === Number of nodes corresponding to Promoter ===
## [1] 854
##
## === Number of genes having Promoter in the neighbor of CpGs DM ===
## [1] 572
##
## === Comparison TET2m-WT analyze ===
## === Number of nodes ===
## [1] 2638
##
## === Number of nodes corresponding to Promoter ===
## [1] 1819
##
## === Number of genes having Promoter in the neighbor of CpGs DM ===
## [1] 1091
design <- model.matrix(~0 + Phenotype_RNAseq_design)
#Removing heteroscedascity from data
v_expr <- voom(DATA_RNAseq[,-77], design, plot = TRUE)

matrix_rna <- v_expr$E
single_mutation <- design[,c(1:4,7)] == 0
heat_RNAseq_all_data <- Create_heatmap_corr(matrix_rna, Phenotype = Phenotype_RNAseq)

single_mutation <- rowSums(design[,c(1:4,7)]) == 0
heat_single_mutation <- Create_heatmap_corr(matrix_rna, single_mutation, Phenotype_RNAseq)

png(filename = "Results/heatmap_RNAseq_allData.png", width = 1920, height = 1080)
heat_RNAseq_all_data
dev.off()
## png
## 2
png(filename = "Results/heatmap_RNAseq_single_mutation.png", width = 1920, height = 1080)
heat_single_mutation
dev.off()
## png
## 2
no_wild_type <- design[,10] == 0
heat_no_WT <- Create_heatmap_corr(matrix_rna, no_wild_type, Phenotype_RNAseq)

png(filename = "Results/heatmap_RNAseq_no_wild_type.png", width = 1920, height = 1080)
heat_no_WT
dev.off()
## png
## 2
no_wild_type_single_mutation <- rowSums(design[,c(1:4,7, 10)]) == 0
heat_no_WT_single_mutation <- Create_heatmap_corr(matrix_rna, no_wild_type_single_mutation, Phenotype_RNAseq)

png(filename = "Results/heatmap_RNAseq_no_wild_type_single_mutation.png", width = 1920, height = 1080)
heat_no_WT_single_mutation
dev.off()
## png
## 2
MacroPatient <- rownames(deconvolution[deconvolution$uncharacterized.cell > 0.5,])
No_Macro_deconv <- colnames(matrix_rna) %in% MacroPatient
names(No_Macro_deconv) <- seq(1:length(No_Macro_deconv))
heat_no_M2_patients <- Create_heatmap_corr(DATA = matrix_rna, remove_cases = No_Macro_deconv, Phenotype = Phenotype_RNAseq)

png(filename = "Results/heatmap_RNAseq_no_M2_patients.png", width = 1920, height = 1080)
heat_no_M2_patients
dev.off()
## png
## 2
DATA_RNA_for_PCA <- cbind(t(matrix_rna), Phenotype_RNAseq)
res.pca.rna <- PCA(DATA_RNA_for_PCA, scale.unit = FALSE, ncp = 2, graph = TRUE, quali.sup = c(56494:56498))


png(filename = "Results/RNA_ellipses_PCA.png", width = 1920, height = 1080)
plotellipses(res.pca.rna, 56494:56498)
dev.off()
## png
## 2
png(filename = "Results/RNA_barplot_PCA.png", width = 1920, height = 1080)
fviz_eig(res.pca.rna, addlabels = TRUE)
dev.off()
## png
## 2
fviz_eig(res.pca.rna, addlabels = TRUE)

plotellipses(res.pca.rna, 56494:56498)

DGEs <- function(data, Phenotype){
require(limma)
require(dplyr)
#require(dendextend)
message("[===========================]")
message("[<<<<<<< DGEs START >>>>>>>>>]")
message("[<<<< Pairwise analysis >>>>>]")
message("-----------------------------")
# This function creates the pairs for the pairwise matrices
design.pairs <- function(levels) {
n <- length(levels)
design <- matrix(0,n,choose(n,2))
rownames(design) <- levels
colnames(design) <- 1:choose(n,2)
k <- 0
for (i in 1:(n - 1))
for (j in (i + 1):n) {
k <- k + 1
design[i,k] <- 1
design[j,k] <- -1
colnames(design)[k] <- paste(levels[i], "-", levels[j],sep = "")
}
design
}
# This function creates the pairs for the pairwise matrices
design <- model.matrix(~0 + Phenotype)
#Removing heteroscedascity from data
v_expr <- voom(data, design, plot = FALSE)
contr.matrix <- design.pairs(levels(factor(Phenotype)))
colnames(design) <- rownames(contr.matrix)
# Fitting linear models for comparisons of interest
Fit <- lmFit(v_expr, design) %>%
contrasts.fit(., contr.matrix) %>%
eBayes(., trend = TRUE)
FitList <- list()
for (i in 1:ncol(contr.matrix)) {
FitList[[i]] <- topTable(Fit, coef = i, adjust.method = "BH", number = nrow(v_expr$E)) %>%
mutate(ID = rownames(.))
message(paste0(i, " done"))
}
names(FitList) <- colnames(contr.matrix)
return(FitList)
}
DGE <- DGEs(data = DATA_RNAseq[,1:(length(DATA_RNAseq[1,])-1)], Phenotype_RNAseq_design)
## [===========================]
## [<<<<<<< DGEs START >>>>>>>>>]
## [<<<< Pairwise analysis >>>>>]
## -----------------------------
## 1 done
## 2 done
## 3 done
## 4 done
## 5 done
## 6 done
## 7 done
## 8 done
## 9 done
## 10 done
## 11 done
## 12 done
## 13 done
## 14 done
## 15 done
## 16 done
## 17 done
## 18 done
## 19 done
## 20 done
## 21 done
## 22 done
## 23 done
## 24 done
## 25 done
## 26 done
## 27 done
## 28 done
## 29 done
## 30 done
## 31 done
## 32 done
## 33 done
## 34 done
## 35 done
## 36 done
## 37 done
## 38 done
## 39 done
## 40 done
## 41 done
## 42 done
## 43 done
## 44 done
## 45 done
## 46 done
## 47 done
## 48 done
## 49 done
## 50 done
## 51 done
## 52 done
## 53 done
## 54 done
## 55 done
name_comparison <- function(name) {
name_splited <- strsplit(name, split = "[-]")[[1]]
name_left <- name_splited[1]
name_right <- name_splited[2]
name_left <- strsplit(name_left, split = "[.]")[[1]]
name_right <- strsplit(name_right, split = "[.]")[[1]]
name_cleaned_left <- name_left[which(str_detect(name_left, "m"))]
name_cleaned_right <- name_right[which(str_detect(name_right, "m"))]
ifelse(isEmpty(name_cleaned_right), name_cleaned_right <- "WT", TRUE)
ifelse(isEmpty(name_cleaned_left), name_cleaned_left <- "WT", TRUE)
name_cleaned_left <- paste(as.character(name_cleaned_left), collapse = ".")
name_cleaned_right <- paste(as.character(name_cleaned_right), collapse = ".")
name_cleaned <- paste(name_cleaned_left, name_cleaned_right, collapse = "-", sep = "-")
return(name_cleaned)
}
for (i in seq(length(DGE))) {
names(DGE)[[i]] <- name_comparison(names(DGE)[[i]])
}
DGE[["WT-FLT3m"]]$logFC <- -DGE[["WT-FLT3m"]]$logFC
names(DGE)[55] <- "FLT3m-WT"
DGE_comparison_of_interest <- list()
comparison_of_interest <- c("IDHm-WT", "WT1m-WT", "DNMT3Am-WT", "FLT3m-WT", "TET2m-WT")
for (i in comparison_of_interest) {
DGE_comparison_of_interest[[i]] <- DGE[[i]]
}
if (!exists("ensembl")){
ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
}
add_genes_coordinates <- function(vector_of_genes) {
# Download homo sapiens genes ensembl database
#
coordinates <- getBM(attributes = c('ensembl_gene_id', 'hgnc_symbol', 'entrezgene_id'), filters = 'ensembl_gene_id', values = vector_of_genes$ID, mart = ensembl)
genes_annoted <- merge(x = vector_of_genes, y = coordinates, by.x = "ID", by.y = "ensembl_gene_id", all.x = TRUE)
return(genes_annoted)
}
Genes_DE <- list()
List_genes_DE <- list()
List_genes_DE[["up"]] <- list()
for (i in names(DGE_comparison_of_interest)) {
message(paste0("\n=== Comparison ", i, " analyze ==="))
DGE_comparison_of_interest[[i]] <- add_genes_coordinates(DGE_comparison_of_interest[[i]])
Genes_DE[[i]] <- DGE_comparison_of_interest[[i]] %>% filter(., (logFC**2 > 2.25 & P.Value < 0.05 & hgnc_symbol != "")) %>% dplyr::select(., logFC, AveExpr, P.Value, hgnc_symbol)
List_genes_DE[["up"]][[i]] <- as.vector(Genes_DE[[i]] %>% filter(., logFC > 0) %>% dplyr::select(., hgnc_symbol))$hgnc_symbol
List_genes_DE[["down"]][[i]] <- as.vector(Genes_DE[[i]] %>% filter(., logFC < 0) %>% dplyr::select(., hgnc_symbol))$hgnc_symbol
write.csv(x = List_genes_DE[["up"]][[i]], file = paste0("Results/", i, "_up_regulated.csv"), row.names = FALSE)
write.csv(x = List_genes_DE[["down"]][[i]], file = paste0("Results/", i, "_down_regulated.csv"), row.names = FALSE)
write.csv(x = Genes_DE[[i]], file = paste0("Results/", i, "_DE.csv"), row.names = FALSE)
}
##
## === Comparison IDHm-WT analyze ===
##
## === Comparison WT1m-WT analyze ===
##
## === Comparison DNMT3Am-WT analyze ===
##
## === Comparison FLT3m-WT analyze ===
##
## === Comparison TET2m-WT analyze ===
venn.diagram(List_genes_DE[["up"]], filename = "Results/Genes_up_regulated_compared_to_WT.png", width = 1920, height = 1080, imagetype = "png")
## [1] 1
venn.diagram(List_genes_DE[["down"]], filename = "Results/Genes_down_regulated_compared_to_WT.png", width = 1920, height = 1080, imagetype = "png")
## [1] 1
up_entrez <- list()
down_entrez <- list()
for (i in names(DGE_comparison_of_interest)) {
up_entrez[[i]] <- DGE_comparison_of_interest[[i]][DGE_comparison_of_interest[[i]]$logFC > 0 & DGE_comparison_of_interest[[i]]$P.Value < 0.05,]
down_entrez[[i]] <- DGE_comparison_of_interest[[i]][DGE_comparison_of_interest[[i]]$logFC < 0 & DGE_comparison_of_interest[[i]]$P.Value < 0.05,]
}
ego_up <- list()
ego_down <- list()
ego <- list()
for (i in names(DGE_comparison_of_interest)) {
message("========================================================")
message("[-----------------Comparison starting-------------------]")
message(paste0("[--- Comparison: ", i, " in progress ---------]"))
ego_up[[i]] <- enrichGO(
gene = unique(up_entrez[[i]]$hgnc_symbol)[-1],
keyType = "SYMBOL",
OrgDb = "org.Hs.eg.db",
ont = "ALL",
pAdjustMethod = "none"
)
ego_down[[i]] <- enrichGO(
gene = unique(down_entrez[[i]]$hgnc_symbol)[-1],
keyType = "SYMBOL",
OrgDb = "org.Hs.eg.db",
ont = "ALL",
pAdjustMethod = "none"
)
# ego[[i]] <- enrichGO(
# gene = c(down_entrez[[i]][["Nodes"]]$hgnc_symbol, up_entrez[[i]][["Nodes"]]$hgnc_symbol),
# keyType = "SYMBOL",
# OrgDb = "org.Hs.eg.db",
# ont = "ALL",
# pAdjustMethod = "none"
# )
}
## ========================================================
## [-----------------Comparison starting-------------------]
## [--- Comparison: IDHm-WT in progress ---------]
## ========================================================
## [-----------------Comparison starting-------------------]
## [--- Comparison: WT1m-WT in progress ---------]
## ========================================================
## [-----------------Comparison starting-------------------]
## [--- Comparison: DNMT3Am-WT in progress ---------]
## ========================================================
## [-----------------Comparison starting-------------------]
## [--- Comparison: FLT3m-WT in progress ---------]
## ========================================================
## [-----------------Comparison starting-------------------]
## [--- Comparison: TET2m-WT in progress ---------]
j <- 5
png(filename = paste0("Results/", comparisons[j], "_up_ego.png"), width = 1920, height = 1080)
dotplot(ego_up[[j]], showCategory = 50 ) + ggtitle(paste0(comparisons[j], " GO terms, Gene Upregulated"))
dev.off()
## png
## 2
png(filename = paste0("Results/", comparisons[j], "_down_ego.png"), width = 1920, height = 1080)
dotplot(ego_down[[j]], showCategory = 50 ) + ggtitle(paste0(comparisons[j], " GO terms, Gene Downregulated"))
dev.off()
## png
## 2
DGE_Blueprint <- list()
for (i in names(DGE_comparison_of_interest)) {
DGE_Blueprint[[i]] <- merge(DGE_comparison_of_interest[[i]], Blueprint_network, by.x = "ID", by.y = "ensembl")
}
find_overlap_between_genes_and_chromatine_network <- function(DATA, bed_file) {
message("\n[====================================================================================================]")
message("[<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]")
message("[<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]")
message("------------------------------------------------------------------------------------------------------\n")
message(paste0("[------------- Comparison: ", i, " in progress ---------]"))
Genes_Ranges <- GRanges(
seqnames = DATA$chr,
ranges = IRanges(start = DATA$start, end = DATA$end),
genes_from_ensembl = DATA$hgnc_symbol,
logFC = DATA$logFC,
pvalue = DATA$P.Value
)
overlaps <- findOverlaps(bed_file, Genes_Ranges)
match_hit <- data.frame(mcols(bed_file[queryHits(overlaps)]), as.data.frame(mcols(Genes_Ranges)[subjectHits(overlaps), ]), stringsAsFactors = T)
return(match_hit)
}
matchit_RNAseq <- list()
Significatif <- list()
for (i in names(DGE_Blueprint)) {
matchit_RNAseq[[i]] <- find_overlap_between_genes_and_chromatine_network(DGE_Blueprint[[i]], PCHiC_GRange)
message("=== Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===")
Significatif[[i]] <- matchit_RNAseq[[i]][matchit_RNAseq[[i]]$logFC**2 > 1.5**2 & matchit_RNAseq[[i]]$pvalue < 0.05,]
print(length(unique(Significatif[[i]]$genes_from_ensembl)))
}
##
## [====================================================================================================]
## [<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]
## [<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]
## ------------------------------------------------------------------------------------------------------
## [------------- Comparison: IDHm-WT in progress ---------]
## === Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===
## [1] 407
##
## [====================================================================================================]
## [<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]
## [<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]
## ------------------------------------------------------------------------------------------------------
## [------------- Comparison: WT1m-WT in progress ---------]
## === Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===
## [1] 136
##
## [====================================================================================================]
## [<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]
## [<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]
## ------------------------------------------------------------------------------------------------------
## [------------- Comparison: DNMT3Am-WT in progress ---------]
## === Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===
## [1] 116
##
## [====================================================================================================]
## [<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]
## [<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]
## ------------------------------------------------------------------------------------------------------
## [------------- Comparison: FLT3m-WT in progress ---------]
## === Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===
## [1] 1184
##
## [====================================================================================================]
## [<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]
## [<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]
## ------------------------------------------------------------------------------------------------------
## [------------- Comparison: TET2m-WT in progress ---------]
## === Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===
## [1] 373
for (i in comparisons) {
message(paste0("\n[------------------------------------------- Comparison: ", i, " in progress -------------------------------------------]"))
message("=== Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===")
Promoter_DE_CpG_DM <- Significatif[[i]][Significatif[[i]]$genes_from_ensembl %in% Promoters_list[[i]]$gene_names,]
print(length(Promoter_DE_CpG_DM$genes_from_ensembl))
message("=== Number of CpGs Up methylated in promoter of Genes down regulated ===")
CpGs_up <- Promoters_list[[i]] %>% dplyr::filter(., logFC > 0)
Promoter_DR_CPG_UM <- Significatif[[i]][Significatif[[i]]$genes_from_ensembl %in% CpGs_up$gene_names,] %>% filter(., logFC < 0)
print(length(unique(Promoter_DR_CPG_UM$genes_from_ensembl)))
print(unique(Promoter_DR_CPG_UM$genes_from_ensembl))
message("=== Number of CpGs Down methylated in promoter of Genes up regulated ===")
CpGs_down <- Promoters_list[[i]] %>% dplyr::filter(., logFC < 0)
Promoter_UR_CPG_DM <- Significatif[[i]][Significatif[[i]]$genes_from_ensembl %in% CpGs_down$gene_names,] %>% filter(., logFC > 0)
print(length(unique(Promoter_UR_CPG_DM$genes_from_ensembl)))
print(unique(Promoter_UR_CPG_DM$genes_from_ensembl))
message("\n=== Number of Genes differentially expressed in the neighbor of CpGs ===")
Promoter_DE_Neighbor <- Significatif[[i]][Significatif[[i]]$genes_from_ensembl %in% match_hit_CpGs_neighbor_promoter[[i]]$gene_names,]
print(length(Promoter_DE_Neighbor$genes_from_ensembl))
}
##
## [------------------------------------------- Comparison: IDHm-WT in progress -------------------------------------------]
## === Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===
## [1] 711
## === Number of CpGs Up methylated in promoter of Genes down regulated ===
## [1] 64
## [1] "AGTRAP" "CDA" "COL9A2" "HENMT1" "SEMA4A" "CD1D"
## [7] "MAP10" "PDLIM1" "UTF1" "C1QTNF4" "MS4A3" "MPZL2"
## [13] "SLC37A2" "ZNF385A" "GTSF1" "SERP2" "CEBPE" "GALC"
## [19] "LTK" "PTX4" "MEFV" "IL27" "CD300LB" "CD300LD"
## [25] "LGALS3BP" "ANGPTL6" "MAG" "CLC" "SULT2B1" "CLEC11A"
## [31] "QPCT" "DAPL1" "CXCR2" "CD93" "C20orf197" "S100B"
## [37] "CX3CR1" "SPATA12" "CD96" "GP9" "HLA-DMB" "SUSD3"
## [43] "FBP1" "PHYHD1" "IL2RG" "ZNF185" "DHRS3" "CACNA2D4"
## [49] "SLC9A3R2" "CMTM2" "SLFN13" "SIGLEC9" "VSTM1" "PTH2R"
## [55] "LINC00649" "S100P" "TBC1D9" "LY86-AS1" "LY86" "TNF"
## [61] "HLA-DPA1" "ADGB" "MACC1" "AK8"
## === Number of CpGs Down methylated in promoter of Genes up regulated ===
## [1] 3
## [1] "F2RL1" "PCDHB15" "CLIC2"
##
## === Number of Genes differentially expressed in the neighbor of CpGs ===
## [1] 1150
##
## [------------------------------------------- Comparison: DNMT3Am-WT in progress -------------------------------------------]
## === Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===
## [1] 129
## === Number of CpGs Up methylated in promoter of Genes down regulated ===
## [1] 0
## character(0)
## === Number of CpGs Down methylated in promoter of Genes up regulated ===
## [1] 3
## [1] "MAATS1" "CPED1" "DCSTAMP"
##
## === Number of Genes differentially expressed in the neighbor of CpGs ===
## [1] 259
##
## [------------------------------------------- Comparison: WT1m-WT in progress -------------------------------------------]
## === Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===
## [1] 7
## === Number of CpGs Up methylated in promoter of Genes down regulated ===
## [1] 2
## [1] "ZSCAN1" "ZNF135"
## === Number of CpGs Down methylated in promoter of Genes up regulated ===
## [1] 0
## character(0)
##
## === Number of Genes differentially expressed in the neighbor of CpGs ===
## [1] 13
##
## [------------------------------------------- Comparison: FLT3m-WT in progress -------------------------------------------]
## === Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===
## [1] 359
## === Number of CpGs Up methylated in promoter of Genes down regulated ===
## [1] 10
## [1] "CD34" "ZNF695" "SLC38A1" "ZNF154" "ITGA9" "CD200" "CACNB4"
## [8] "ITGA6" "PTH2R" "SLCO5A1"
## === Number of CpGs Down methylated in promoter of Genes up regulated ===
## [1] 19
## [1] "ARTN" "QSOX1" "FCAMR" "PROX1" "MS4A6A" "MMP19"
## [7] "HAL" "SLC10A2" "DTNA" "HNMT" "CSTA" "MARVELD2"
## [13] "KCNN2" "TREM1" "TBX20" "PENK" "ONECUT2" "MEIS1"
## [19] "LINC00899"
##
## === Number of Genes differentially expressed in the neighbor of CpGs ===
## [1] 740
##
## [------------------------------------------- Comparison: TET2m-WT in progress -------------------------------------------]
## === Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===
## [1] 277
## === Number of CpGs Up methylated in promoter of Genes down regulated ===
## [1] 7
## [1] "F3" "ANXA2" "TUBG2" "MPP6" "OPHN1" "PGM1" "PTH2R"
## === Number of CpGs Down methylated in promoter of Genes up regulated ===
## [1] 9
## [1] "NRXN3" "ALS2" "ITIH1" "BTLA" "KIT" "RD3" "NR2F2" "RNF212"
## [9] "SYCP2L"
##
## === Number of Genes differentially expressed in the neighbor of CpGs ===
## [1] 668